%% Code description
% This code estimates the demand parameters. 

%% Housekeeping
clear;

% change accordingly
root_dir = '...';

%% Load data inputs
% load the estimation sample as a table
sample = readtable('estimation_sample.csv');
sample = str2double_ftn(sample);

% # of states in the 2nd period
K = 5;

% name variables in the sample
[D, N] = load_data_ftn(sample, K);
save('sample.mat')
clear sample

% choose whether to incorporate consumers that expect zero price increase
myopic = 0.42; 

% redundant
use_log_n = 1;

%% Load data on additional agg state
% Distribution of (interest rate, NH cost) states conditional on sales year. 
% mm=0 if r high & nh low 
% mm=1 if r high & nh high
% mm=2 if r low & nh low
% mm=3 if r low & nh high
D.pr_mm_yr = [1 1 2 0;...
    0 1 3 0;...
    0 0 4 0;...
    0 0 4 0;...
    0 0 3 1;...
    0 0 3 1;...
    0 0 2 2;...
    1 0 1 2]/4;

M = size(D.pr_mm_yr,2);

% each row vector in pr_mm_yr corresponds to the following sales year
D.sales_yr_mm = (2000:1:2007)';

% based on sales year, map each observation to the relevant dist of mm
pr_mm = zeros(N.ist, M);
for jj=1:N.ist
    yr_jj = D.year(jj);
    ind_jj = find(D.sales_yr_mm==yr_jj);
    pr_mm_jj = D.pr_mm_yr(ind_jj,:);
    pr_mm(jj,:) = pr_mm_jj;
end
D.pr_mm = pr_mm;

% rate increase multiplier for each mm=0,1,2,3: computed from the rate
% increase data (Online Appenedix D.1)
D.eta_mm = [17.28 21.20 24.23 23.60]/17.28; % 1 x M

% claims multiplier for each mm=0,1,2,3 (Online Appendix D.2)
D.gamma_mm = [89131 96167 89131 96167]/89131; % 1 x M

%% Parameters that are fixed
max_p1 = max(D.p1);
max_p2 = max(D.eta_mm)*max(max(D.p2_actual)); 

[pr_fc, x_fc, lapse, no_insurer_fe,...
    beta_c, beta_f, T1, T2, B1_c, B2_c, B1_f, B2_f, ...
    crra, rho, sigma_c1, sigma_c2, eulerc, ...
    N_y, pr_y, y_medi, delta, resource_y1, resource_y2] ...
    = set_parameters(K, max_p1, max_p2);

%% Starting values
% (1) alpha = consumption utility scale (price coefficient) -> internally estimated
% (2) lapse_u = one-time utility cost from lapsing the contract in t=2 -> for robustness check with endogenous lapses
% (3) variety_u = utility from # of insurers/plans in the product -> irrelevant 
% (4) xi = insurer FE -> irrelevant

% sort unique values of NAIC
naic_unique = sort(unique(D.naic));
N.xi = length(naic_unique);

x0 = [1;... % alpha
    0;...   % lapse_u
    1;      % variety_u 
    zeros(length(naic_unique),1)];  % insurer FE

%% Minimization
LBar = -10*ones(size(x0));
UBar = 10*ones(size(x0));
LBar(1) = 0; 

% initiate
x_new = x0;

% indices of parameters in x0 that are being estimated
if no_insurer_fe==0
    if lapse==1 % endogenous lapses
        ind_select = [1 2 4:length(x0)];
    elseif lapse==0 % exogenous lapses
        ind_select = [1 4:length(x0)];
    end
elseif no_insurer_fe==1 % don't estimate insurer FE
    if lapse==1 % endogenous lapses
        ind_select = [1 2];
    elseif lapse==0 % exogenous lapses
        ind_select = 1; % this is the baseline case, where we only estimate alpha
    end
end

% parameters that are being estimated
x_select = x0(ind_select);
x_select_start = x0(ind_select);

% display starting values
vartype = {'alpha', 'lapse_u', 'variety_u'};
vartype_select = vartype(ind_select);
disp(' ')
disp('****************')
disp('Parameter   Starting value    ')
for hh=1:length(vartype_select)
    fprintf('%-9s %10.4f \n', ...
        vartype_select{hh}, x_select(hh));
end

% function to be minimized
ff = @(x_select)demand_objective_outer(x_select, ind_select, x_new, ...
    K, D, N,...
    pr_fc, x_fc, lapse, beta_c, crra, rho, sigma_c1, sigma_c2, eulerc,...
    N_y, pr_y, y_medi, T1, T2, B1_c, B2_c, naic_unique,...
    no_insurer_fe, delta, use_log_n, myopic,...
    resource_y1, resource_y2);

% tolerances
tolmesh = 1e-6;
tolfun = 1e-4; 
tolx = 1e-4;

% run the solver
disp(' ')
disp('****************')
disp('DEMAND ESTIMATION STARTS')
tic0=tic;

options = optimset('Display', 'iter',...
    'TolX', tolx, ...
    'TolFun', tolfun, ...
    'MaxFunEvals', 2000,...
    'MaxIter', 1000);

disp(' ')
disp('Enter fminsearch...')

[x_select_new, fval, exitflag, output] = fminsearch(ff, x_select, options);

% update x_new based on estimated parameter values
x_new(ind_select) = x_select_new;

% print
toc0 = toc(tic0);
disp(' ')
disp('****************')
disp('DEMAND ESTIMATION RESULTS')
disp('Parameter   Estimate    Starting value    ')
for hh=1:length(vartype_select)
    fprintf('%-12s %10.4f %10.4f \n', ...
        vartype_select{hh}, x_select_new(hh), x_select_start(hh));
end
disp(' ')
disp(['Time elapsed (hrs) = ', num2str(toc0/(60*60), 5)]);

%% calibration with higher elasticity (for Appendix Figure A.4)
% x_new=x0;
% x_new(1)=2.475;

%% Solve the demand model at the estimates
[obj, Delta_xi, S, v_c, v_m, s1_predicted, s1_predicted_y, pr_stay,...
    s1_predicted_y_c, s1_predicted_y_m,...
    moment_mean_all, naic_xi,...
    var_cov, N_ist_unmerge] ...
    = demand_objective_inner(x_new, K, D, N,...
    pr_fc, x_fc, lapse, beta_c, crra, rho, sigma_c1, sigma_c2, eulerc,...
    N_y, pr_y, y_medi, T1, T2, B1_c, B2_c, naic_unique,...
    no_insurer_fe, ind_select, delta, use_log_n, myopic,...
    resource_y1, resource_y2);

%% Save
% demand estimates
alpha      = x_new(1);     % consumption utility scale
lapse_u    = x_new(2);     % one-time utility cost from lapsing the contract in t=2 (relevant only with endo lapses)
variety_u  = x_new(3);     % product variety scale -> irrelevant

% (insurer,state,year) identifier in the demand estimation sample
demand_id_ist = D.id_ist;

% save
save('demand_estimates.mat', 'D', 'N', 'K', ...
    'alpha', 'lapse_u', 'variety_u', 'Delta_xi', ...
    'S', 'v_c', 'v_m', 's1_predicted', 's1_predicted_y', 'pr_stay', ...
    's1_predicted_y_c', 's1_predicted_y_m',...
    'demand_id_ist', 'use_log_n', 'myopic', 'resource_y1', 'resource_y2')

%% Predicted demand: analyze patterns
% - marginal dist of income
pr_income = zeros(3,1);
pr_income(1) = pr_y(1)+pr_y(4); % lowest income
pr_income(2) = pr_y(2)+pr_y(5); % middle 
pr_income(3) = pr_y(3)+pr_y(6); % high

% - marginal dist of family care
pr_ic = zeros(2,1);
pr_ic(1) = sum(pr_y(1:3)); % prob of no informal care
pr_ic(2) = sum(pr_y(4:6)); % prob of informal care

% - dist of family care conditional on income
pr_noic_y = zeros(3,1);
pr_noic_y(1) = pr_y(1)/(pr_y(1)+pr_y(4));
pr_noic_y(2) = pr_y(2)/(pr_y(2)+pr_y(5));
pr_noic_y(3) = pr_y(3)/(pr_y(3)+pr_y(6));

% store demand at the market level
pr_inside_y  = zeros(N.st, 3);
pr_inside_ic = zeros(N.st, 2);
pr_inside    = zeros(N.st,1);

pr_fc_insured   = zeros(N.st,1);
pr_fc_uninsured = zeros(N.st,1);

for mm=1:N.st
    
     iind = find(D.mkt==mm); 
     
     s1_predicted_y_mm = s1_predicted_y(:, iind);
     
     % prob of choosing inside option conditional on y
     s1_predicted_y_mm = sum(s1_predicted_y_mm,2);
     
     % integrate over family care distribution
     pr_inside_y(mm,1) = s1_predicted_y_mm(1)*pr_noic_y(1) + s1_predicted_y_mm(4)*(1-pr_noic_y(1));
     pr_inside_y(mm,2) = s1_predicted_y_mm(2)*pr_noic_y(2) + s1_predicted_y_mm(5)*(1-pr_noic_y(2));
     pr_inside_y(mm,3) = s1_predicted_y_mm(3)*pr_noic_y(3) + s1_predicted_y_mm(6)*(1-pr_noic_y(3));
     
     % integrate over income
     pr_inside_ic(mm,1) = sum(s1_predicted_y_mm(1:3).*pr_income); 
     pr_inside_ic(mm,2) = sum(s1_predicted_y_mm(4:6).*pr_income);
     
     % overall demand at the market level
     pr_inside(mm) = sum(pr_inside_y(mm,:).*pr_income',2);
     
     % to compare risk of insured vs. uninsured
     pr_fc_insured(mm) = sum(pr_fc.*(s1_predicted_y_mm/sum(s1_predicted_y_mm)));
     pr_fc_uninsured(mm) = sum(pr_fc.*((1-s1_predicted_y_mm)/sum(1-s1_predicted_y_mm)));
end

% from HRS
emp_demand_y  = [ .0610973;  .1129678;  .2215273];
emp_demand_ic = [.1687407; .1338024]; 
disp(' ')
disp(['Mean predicted insured rate = ', num2str(mean(pr_inside),4)])
disp(['Mean predicted insured rate cndtl on income '])
disp(['   Low income    = ', num2str(mean(pr_inside_y(:,1)),4), '   cf. empirical = ', num2str(emp_demand_y(1))])
disp(['   Middle income = ', num2str(mean(pr_inside_y(:,2)),4), '   cf. empirical = ', num2str(emp_demand_y(2))])
disp(['   High income   = ', num2str(mean(pr_inside_y(:,3)),4), '   cf. empirical = ', num2str(emp_demand_y(3))])
disp(['Mean predicted insured rate cndtl on family care availability '])
disp(['   No family care        = ', num2str(mean(pr_inside_ic(:,1)),4), '    cf. empirical = ', num2str(emp_demand_ic(1))])
disp(['   Family care available = ', num2str(mean(pr_inside_ic(:,2)),4), '    cf. empirical = ', num2str(emp_demand_ic(2))])
disp(' ')
disp(['Mean formal care risk of the insured   = ', num2str(mean(pr_fc_insured))])
disp(['Mean formal care risk of the uninsured = ', num2str(mean(pr_fc_uninsured))])

%% Compute standard errors 
W = inv(var_cov); 
N_star = N_ist_unmerge;

% estimates
alpha_hat   = x_new(1);
variety_hat = x_new(3);

% std
[SE_alpha, ~] = std_error(alpha_hat, variety_hat, ...
    moment_mean_all, var_cov, W, N_star,...
    K, D, N,...
    pr_fc, x_fc, lapse, beta_c, crra, rho, sigma_c1, sigma_c2, eulerc,...
    N_y, pr_y, y_medi, T1, T2, B1_c, B2_c, naic_unique,...
    no_insurer_fe, ind_select, delta, use_log_n, myopic,...
    resource_y1, resource_y2);

%% Compute elasticity
[elas_p, elas_p_mkt, major_unmerge] = elas_demand(root_dir, D, N, s1_predicted_y,...
    pr_y, N_y, resource_y1, resource_y2, ...
    alpha, variety_u, B1_c, B2_c, K, crra, rho, sigma_c1, sigma_c2, delta, beta_c, T1, T2);

% change back the directory
cd(fullfile(root_dir, 'matlab/demand_estimation_AK'));

save('demand_results_all.mat')
